{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import re\n",
    "import copy\n",
    "import dtale\n",
    "import os\n",
    "#pd.set_option(\"Max_columns\", None)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Table 3. Descriptive Statistics- IVs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "dt_master = pd.read_excel(\"data/Minerva_Clustering_20220209_MARupdated_Vdemadded.xlsx\")\n",
    "dt_pitf = pd.read_excel(\"data/PITF_2yrpeace_2yrlag.xlsx\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>year</th>\n",
       "      <th>polity_squared</th>\n",
       "      <th>PopYouthBulgeBy15_new</th>\n",
       "      <th>gdppc_ln</th>\n",
       "      <th>nAC_perc</th>\n",
       "      <th>imr_annual_mean_centered</th>\n",
       "      <th>sum_PDIS_EDIS</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>6260.000000</td>\n",
       "      <td>6260.000000</td>\n",
       "      <td>6260.000000</td>\n",
       "      <td>6260.000000</td>\n",
       "      <td>6260.000000</td>\n",
       "      <td>6260.000000</td>\n",
       "      <td>6260.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>1989.027157</td>\n",
       "      <td>55.319649</td>\n",
       "      <td>41.344049</td>\n",
       "      <td>8.674893</td>\n",
       "      <td>0.256262</td>\n",
       "      <td>3.797022</td>\n",
       "      <td>7.713738</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>14.606777</td>\n",
       "      <td>31.906626</td>\n",
       "      <td>8.924901</td>\n",
       "      <td>1.291389</td>\n",
       "      <td>0.303323</td>\n",
       "      <td>42.457701</td>\n",
       "      <td>8.761244</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>1960.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>17.885186</td>\n",
       "      <td>2.041869</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>-84.436250</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>1977.000000</td>\n",
       "      <td>36.000000</td>\n",
       "      <td>33.799559</td>\n",
       "      <td>7.675125</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>-28.580936</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>1990.000000</td>\n",
       "      <td>49.000000</td>\n",
       "      <td>44.820292</td>\n",
       "      <td>8.643223</td>\n",
       "      <td>0.200000</td>\n",
       "      <td>-3.658023</td>\n",
       "      <td>6.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>2002.000000</td>\n",
       "      <td>81.000000</td>\n",
       "      <td>48.056279</td>\n",
       "      <td>9.656438</td>\n",
       "      <td>0.428571</td>\n",
       "      <td>35.011667</td>\n",
       "      <td>12.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>2012.000000</td>\n",
       "      <td>100.000000</td>\n",
       "      <td>56.409996</td>\n",
       "      <td>12.367041</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>136.063750</td>\n",
       "      <td>57.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              year  polity_squared  PopYouthBulgeBy15_new     gdppc_ln  \\\n",
       "count  6260.000000     6260.000000            6260.000000  6260.000000   \n",
       "mean   1989.027157       55.319649              41.344049     8.674893   \n",
       "std      14.606777       31.906626               8.924901     1.291389   \n",
       "min    1960.000000        0.000000              17.885186     2.041869   \n",
       "25%    1977.000000       36.000000              33.799559     7.675125   \n",
       "50%    1990.000000       49.000000              44.820292     8.643223   \n",
       "75%    2002.000000       81.000000              48.056279     9.656438   \n",
       "max    2012.000000      100.000000              56.409996    12.367041   \n",
       "\n",
       "          nAC_perc  imr_annual_mean_centered  sum_PDIS_EDIS  \n",
       "count  6260.000000               6260.000000    6260.000000  \n",
       "mean      0.256262                  3.797022       7.713738  \n",
       "std       0.303323                 42.457701       8.761244  \n",
       "min       0.000000                -84.436250       0.000000  \n",
       "25%       0.000000                -28.580936       0.000000  \n",
       "50%       0.200000                 -3.658023       6.000000  \n",
       "75%       0.428571                 35.011667      12.000000  \n",
       "max       1.000000                136.063750      57.000000  "
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dt_master[(dt_master.year<=2012) & (dt_master.year>=1960)][dt_master.columns[:-2]].dropna().describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>year</th>\n",
       "      <th>polity_squared</th>\n",
       "      <th>PopYouthBulgeBy15_new</th>\n",
       "      <th>gdppc_ln</th>\n",
       "      <th>nAC_perc</th>\n",
       "      <th>imr_annual_mean_centered</th>\n",
       "      <th>sum_PDIS_EDIS</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>76.000000</td>\n",
       "      <td>76.000000</td>\n",
       "      <td>76.000000</td>\n",
       "      <td>76.000000</td>\n",
       "      <td>76.000000</td>\n",
       "      <td>76.000000</td>\n",
       "      <td>76.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>1989.368421</td>\n",
       "      <td>40.486842</td>\n",
       "      <td>44.630487</td>\n",
       "      <td>8.098927</td>\n",
       "      <td>0.373630</td>\n",
       "      <td>22.507192</td>\n",
       "      <td>12.434211</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>13.439089</td>\n",
       "      <td>26.021270</td>\n",
       "      <td>6.877194</td>\n",
       "      <td>1.199196</td>\n",
       "      <td>0.315258</td>\n",
       "      <td>40.152124</td>\n",
       "      <td>10.742235</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>1965.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>24.309952</td>\n",
       "      <td>2.367905</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>-72.982143</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>1978.000000</td>\n",
       "      <td>22.750000</td>\n",
       "      <td>43.514676</td>\n",
       "      <td>7.291633</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>-13.981012</td>\n",
       "      <td>5.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>1990.000000</td>\n",
       "      <td>49.000000</td>\n",
       "      <td>46.865368</td>\n",
       "      <td>8.128100</td>\n",
       "      <td>0.366667</td>\n",
       "      <td>22.561779</td>\n",
       "      <td>8.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>2000.000000</td>\n",
       "      <td>49.000000</td>\n",
       "      <td>48.553502</td>\n",
       "      <td>9.012391</td>\n",
       "      <td>0.578571</td>\n",
       "      <td>54.472120</td>\n",
       "      <td>19.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>2014.000000</td>\n",
       "      <td>100.000000</td>\n",
       "      <td>53.236774</td>\n",
       "      <td>10.253272</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>109.168627</td>\n",
       "      <td>40.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              year  polity_squared  PopYouthBulgeBy15_new   gdppc_ln  \\\n",
       "count    76.000000       76.000000              76.000000  76.000000   \n",
       "mean   1989.368421       40.486842              44.630487   8.098927   \n",
       "std      13.439089       26.021270               6.877194   1.199196   \n",
       "min    1965.000000        0.000000              24.309952   2.367905   \n",
       "25%    1978.000000       22.750000              43.514676   7.291633   \n",
       "50%    1990.000000       49.000000              46.865368   8.128100   \n",
       "75%    2000.000000       49.000000              48.553502   9.012391   \n",
       "max    2014.000000      100.000000              53.236774  10.253272   \n",
       "\n",
       "        nAC_perc  imr_annual_mean_centered  sum_PDIS_EDIS  \n",
       "count  76.000000                 76.000000      76.000000  \n",
       "mean    0.373630                 22.507192      12.434211  \n",
       "std     0.315258                 40.152124      10.742235  \n",
       "min     0.000000                -72.982143       0.000000  \n",
       "25%     0.000000                -13.981012       5.000000  \n",
       "50%     0.366667                 22.561779       8.000000  \n",
       "75%     0.578571                 54.472120      19.000000  \n",
       "max     1.000000                109.168627      40.000000  "
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dt_pitf[dt_pitf.columns[:-4]].dropna().describe()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Table 4 & 5 & 6. Descriptive Statistics- Sensitivity Analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dict_keys(['eucl_pitf_agnes', 'eucl_pitf_pam', 'eucl_pitf_vdem_agnes', 'eucl_pitf_vdem_pam', 'eucl_prio_agnes', 'eucl_prio_pam', 'eucl_prio_vdem_agnes', 'eucl_prio_vdem_pam', 'gower_pitf_agnes', 'gower_pitf_pam', 'gower_pitf_vdem_agnes', 'gower_pitf_vdem_pam', 'gower_prio_agnes', 'gower_prio_pam', 'gower_prio_vdem_agnes', 'gower_prio_vdem_pam'])"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cluster_char = dict()\n",
    "for file in os.listdir(\"output\"):\n",
    "    if \"3clust_\" in file:\n",
    "        method = file[7:-5]\n",
    "        f_output = pd.ExcelFile(f\"output/{file}\")\n",
    "        dt_agnes = f_output.parse(sheet_name=\"3clust_agnes\")\n",
    "        dt_pam = f_output.parse(sheet_name=\"3clust_PAM\")\n",
    "        cluster_char[f\"{method}_agnes\"] = dt_agnes\n",
    "        cluster_char[f\"{method}_pam\"] = dt_pam\n",
    "cluster_char.keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "c_var = [\n",
    "    'polity_squared', \n",
    "          'PopYouthBulgeBy15_new', \n",
    "          'gdppc_ln',\n",
    "          'nAC_perc',\n",
    "          'imr_annual_mean_centered',\n",
    "          'sum_PDIS_EDIS'\n",
    "]\n",
    "c_var_mm = [*[f\"{c}_mean\" for c in c_var], *[f\"{c}_median\" for c in c_var], *[f\"{c}_scale_mean\" for c in c_var]]\n",
    "dt_summary = []\n",
    "for k in cluster_char:\n",
    "    dt = copy.deepcopy(cluster_char[k])\n",
    "    cl = [*[i for i in dt.columns if i.endswith(\"_cluster\")],*[\"n\"], *c_var_mm]\n",
    "    dt = dt[cl]\n",
    "    dt.insert(0, \"method\", k)\n",
    "    if k == \n",
    "    dt_summary.append(dt)\n",
    "dt_summary = pd.concat(dt_summary)\n",
    "#\n",
    "dt_summary.agnes_cluster = dt_summary.agnes_cluster.fillna(dt_summary.pam_cluster)\n",
    "dt_summary.round(2).to_excel(\"output/3clusters_summary.xlsx\", index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [],
   "source": [
    "dt_summary_fix = pd.read_excel(\"output/3clusters_summary clusterfix 20220210.xlsx\")\n",
    "c_mean = [c for c in dt_summary_fix.columns if c.endswith(\"_mean\") and \"_scale_\" not in c ]\n",
    "c_median = [c for c in dt_summary_fix.columns if c.endswith(\"_median\") and \"_scale_\" not in c ]\n",
    "dt_mean = dt_summary_fix[[*['method', 'Cluster', 'n'],*c_mean]].copy()\n",
    "dt_median = dt_summary_fix[[*['method', 'Cluster', 'n'],*c_median]].copy()\n",
    "dt_mean.columns = [c[:-5] if c.endswith(\"_mean\") else c for c in dt_mean.columns]\n",
    "dt_median.columns = [c.replace(\"_median\", \"\") for c in dt_median.columns]\n",
    "dt_mean.method = dt_mean.method.str.replace(\"_\", \"+\").str.upper()\n",
    "dt_median.method = dt_median.method.str.replace(\"_\", \"+\").str.upper()\n",
    "##\n",
    "clu_map = {\n",
    "    1: \"Anocratic - Younger - Less Developed\",\n",
    "    2:\"Older - Wealthier - Moderate Discrimination\",\n",
    "    3:\"Higher Discrimination - Worse Neighborhood - Younger\"\n",
    "}\n",
    "dt_mean.Cluster = dt_mean.Cluster.replace(clu_map)\n",
    "dt_median.Cluster = dt_median.Cluster.replace(clu_map)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [],
   "source": [
    "dt_mean_compile = dt_mean.copy(True)\n",
    "for cl in dt_mean_compile.Cluster.unique():\n",
    "    for m in dt_mean_compile.method.unique():\n",
    "        for c in dt_mean.columns[3:]:\n",
    "            dt_mean_compile.loc[(dt_mean_compile.method==m)&(dt_mean_compile.Cluster==cl), c]  = \\\n",
    "            str(dt_mean.loc[(dt_mean.method==m)&(dt_mean.Cluster==cl), c].values[0]) + \\\n",
    "            \" \" + \"(\" + str(dt_median.loc[(dt_median.method==m)&(dt_median.Cluster==cl), c].values[0]) + \")\"\n",
    "dt_mean_compile.to_excel(\"output/3clusters_descriptive.xlsx\",index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Table 7 Std of Clusters"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [],
   "source": [
    "c_scale_mean = [c for c in dt_summary_fix.columns if c.endswith(\"scale_mean\") ]\n",
    "dt_scalemean = dt_summary_fix[[*['method', 'Cluster', 'n'],*c_scale_mean]].copy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(Cluster                                0.000000\n",
       " n                                      9.689986\n",
       " polity_squared_scale_mean              0.095601\n",
       " PopYouthBulgeBy15_new_scale_mean       0.040451\n",
       " gdppc_ln_scale_mean                    0.026005\n",
       " nAC_perc_scale_mean                    0.094408\n",
       " imr_annual_mean_centered_scale_mean    0.039644\n",
       " sum_PDIS_EDIS_scale_mean               0.026833\n",
       " dtype: float64, 0.05382364570644602)"
      ]
     },
     "execution_count": 101,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# dt_scalemean[dt_scalemean.Cluster==2].mean()\n",
    "dt_scalemean[dt_scalemean.Cluster==1].std(), np.mean(dt_scalemean[dt_scalemean.Cluster==1].std().values[2:])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Table 8- Case Membership"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 170,
   "metadata": {},
   "outputs": [],
   "source": [
    "membership_list = []\n",
    "for file in os.listdir(\"output\"):\n",
    "    if \"3clust_\" in file:\n",
    "        method = file[7:-5]\n",
    "        f_output = pd.ExcelFile(f\"output/{file}\")\n",
    "        dt_membership = f_output.parse(sheet_name=\"membership\")\n",
    "        dt_pam = dt_membership[[\"country\", \"year\", \"pam_cluster\"]].copy()\n",
    "        dt_pam.columns = [\"country\", \"year\", \"cluster_raw\"]\n",
    "        dt_pam.insert(0, \"method\", f\"{method}_pam\")\n",
    "        dt_agnes = dt_membership[[\"country\", \"year\", \"agnes_cluster\"]].copy()\n",
    "        dt_agnes.columns = [\"country\", \"year\", \"cluster_raw\"]\n",
    "        dt_agnes.insert(0, \"method\", f\"{method}_agnes\")\n",
    "        dt_cl = pd.concat([dt_pam, dt_agnes])\n",
    "        #\n",
    "        membership_list.append(dt_cl)\n",
    "dt_member = pd.concat(membership_list)\n",
    "##\n",
    "dt_summary_fix = pd.read_excel(\"output/3clusters_summary clusterfix 20220210.xlsx\")\n",
    "dt_member_fix = dt_summary_fix[[\"method\", \"Cluster\",\"cluster_raw\", \"n\"]]\n",
    "dt_member = pd.merge(left = dt_member, right = dt_member_fix, on=[\"method\", \"cluster_raw\"], how=\"left\")\n",
    "# dt_member.groupby([\"method\", \"Cluster\"]).count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 171,
   "metadata": {},
   "outputs": [],
   "source": [
    "dt_member_pitf = dt_member[dt_member.method.str.contains(\"_pitf_\")][[\"country\", \"year\", \"Cluster\"]].reset_index(drop=True).copy()\n",
    "cl1_list =[]\n",
    "cl2_list = []\n",
    "cl3_list = []\n",
    "n_cl_list = []\n",
    "for i in range(dt_member_pitf.shape[0]):\n",
    "    c = dt_member_pitf.country[i]\n",
    "    y = dt_member_pitf.year[i]\n",
    "    cl1 = dt_member_pitf[(dt_member_pitf.country==c)&(dt_member_pitf.year==y)&(dt_member_pitf.Cluster==1)].shape[0]\n",
    "    cl2 = dt_member_pitf[(dt_member_pitf.country==c)&(dt_member_pitf.year==y)&(dt_member_pitf.Cluster==2)].shape[0]\n",
    "    cl3 = dt_member_pitf[(dt_member_pitf.country==c)&(dt_member_pitf.year==y)&(dt_member_pitf.Cluster==3)].shape[0]\n",
    "    n_cl = [cl1,cl2,cl3].count(0) \n",
    "    if n_cl==2:\n",
    "        n_cl_list.append(1)\n",
    "    elif n_cl ==1:\n",
    "        n_cl_list.append(2)\n",
    "    elif n_cl ==0:\n",
    "        n_cl_list.append(3)\n",
    "    else:\n",
    "        raise\n",
    "    cl1_list.append(cl1)\n",
    "    cl2_list.append(cl2)\n",
    "    cl3_list.append(cl3)\n",
    "dt_member_pitf[\"Anocratic - Younger - Less Developed\"] = cl1_list\n",
    "dt_member_pitf[\"Older - Wealthier - Moderate Discrimination\"] = cl2_list\n",
    "dt_member_pitf[\"Higher Discrimination - Worse Neighborhood - Younger\"] = cl3_list\n",
    "dt_member_pitf[\"Number of Clusters\"] = n_cl_list\n",
    "dt_member_pitf.drop(columns=[\"Cluster\"]).drop_duplicates().to_excel(\"output/3clusters_membership clusterfix pitf.xlsx\", index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 172,
   "metadata": {},
   "outputs": [],
   "source": [
    "dt_member_pitf = dt_member[dt_member.method.str.contains(\"_prio_\")][[\"country\", \"year\", \"Cluster\"]].reset_index(drop=True).copy()\n",
    "cl1_list =[]\n",
    "cl2_list = []\n",
    "cl3_list = []\n",
    "n_cl_list = []\n",
    "for i in range(dt_member_pitf.shape[0]):\n",
    "    c = dt_member_pitf.country[i]\n",
    "    y = dt_member_pitf.year[i]\n",
    "    cl1 = dt_member_pitf[(dt_member_pitf.country==c)&(dt_member_pitf.year==y)&(dt_member_pitf.Cluster==1)].shape[0]\n",
    "    cl2 = dt_member_pitf[(dt_member_pitf.country==c)&(dt_member_pitf.year==y)&(dt_member_pitf.Cluster==2)].shape[0]\n",
    "    cl3 = dt_member_pitf[(dt_member_pitf.country==c)&(dt_member_pitf.year==y)&(dt_member_pitf.Cluster==3)].shape[0]\n",
    "    n_cl = [cl1,cl2,cl3].count(0) \n",
    "    if n_cl==2:\n",
    "        n_cl_list.append(1)\n",
    "    elif n_cl ==1:\n",
    "        n_cl_list.append(2)\n",
    "    elif n_cl ==0:\n",
    "        n_cl_list.append(3)\n",
    "    else:\n",
    "        raise\n",
    "    cl1_list.append(cl1)\n",
    "    cl2_list.append(cl2)\n",
    "    cl3_list.append(cl3)\n",
    "dt_member_pitf[\"Anocratic - Younger - Less Developed\"] = cl1_list\n",
    "dt_member_pitf[\"Older - Wealthier - Moderate Discrimination\"] = cl2_list\n",
    "dt_member_pitf[\"Higher Discrimination - Worse Neighborhood - Younger\"] = cl3_list\n",
    "dt_member_pitf[\"Number of Clusters\"] = n_cl_list\n",
    "dt_member_pitf.drop(columns=[\"Cluster\"]).drop_duplicates().to_excel(\"output/3clusters_membership clusterfix prio.xlsx\", index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 122,
   "metadata": {},
   "outputs": [],
   "source": [
    "# country_year_prio_matched = []\n",
    "# cases_prio = member_prio.country_year_prio.unique()\n",
    "# cases_pitf = member_pitf.country_year_pitf.unique()\n",
    "# for case in cases_pitf:\n",
    "#     if case in cases_prio:\n",
    "#         country_year_prio_matched.append(case)\n",
    "#     else:\n",
    "#         c = case.split(\"_\")[0]\n",
    "#         y = int(case.split(\"_\")[1])\n",
    "#         if c not in c_prio:\n",
    "#             country_year_prio_matched.append(None)\n",
    "#         else:\n",
    "#             for gap in range(1,7):\n",
    "#                 if gap >5:\n",
    "#                     country_year_prio_matched.append(None)\n",
    "#                     break\n",
    "#                 else:\n",
    "#                     if f\"{c}_{y+gap}\" in cases_prio:\n",
    "#                         country_year_prio_matched.append(f\"{c}_{y+gap}\")\n",
    "#                         break\n",
    "#                     elif f\"{c}_{y-gap}\" in cases_prio:\n",
    "#                         country_year_prio_matched.append(f\"{c}_{y-gap}\")\n",
    "#                         break\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
